The following is a replicate of this statistics explained article: https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Culture_statistics_-_cultural_employment#Cultural_employment_.E2.80.93_current_state_and_latest_developments

# install and load packages
pacman::p_load(restatapi,     # for Eurostat data
        giscoR,       # for Eurostat geographical Data
        ggplot2,      # for graphics
        highcharter,  # for interactive elements in graphics
        tidyverse,    # for data cleaning
        # eurostat, 
        cowplot,       # for layering maps with "ggdraw"
        writexl,       # for exporting data as excel files
        kableExtra,     # for Table formatting
        webshot2,
        magick
        )
## Installing package into '/srv/rlibs'
## (as 'lib' is unspecified)
## 
## magick installed
## Warning in pacman::p_load(restatapi, giscoR, ggplot2, highcharter, tidyverse, : Failed to install/load:
## magick
# color palette
clrs <- read.csv("estat_colour_themes.txt",sep="\t",stringsAsFactors = F)
image(c(1:6),c(1:nrow(clrs)),matrix(1:(6*nrow(clrs)),6,nrow(clrs)),col=as.vector(t(apply(as.matrix(clrs[,c(3:8)]),2,rev))), xlab = "", ylab = "", xaxt = "n", yaxt = "n", bty = "n",
      main = "Eurostat colour palettes")
text(c(0.5), c(1:nrow(clrs)), adj=c(0,1),rev(clrs$theme_desc), col = clrs$text_light)
text(c(1.5), c(1:nrow(clrs)), adj=c(0,1),rev(clrs$theme_desc), col = clrs$text_dark)

col <- as.vector(clrs[1, 4:8])

1. Map 1

#load data
id1 <- "cult_emp_sex"
date1 <- 2022

cult_emp_map <- get_eurostat_data(id1, filters = list(sex = "T", unit = "PC_EMP", time = date1, label = T))


# filter data for needed variables  
cult_emp_map <- cult_emp_map %>% 
             filter(geo != "EU27_2020") %>% 
             select(geo, values) 


# 1. first possibility with nuts data
# get nuts data
# nuts <- gisco_get_nuts(year = 2021, nuts_level = 0, resolution = "1")
nuts <- gisco_get_countries(year = 2020,  resolution = "10")

# merge the nuts data with the cultural employment data
merged_data <- merge(nuts, cult_emp_map, 
                     by.x = "CNTR_ID", by.y = "geo", all.x = TRUE)

data.table::setnames(merged_data,c("geo",colnames(merged_data)[2:14]))

# create a data frame with only Malta for augmentation
malta <- merged_data[merged_data$geo == "MT", ]


# create a map that shows Malta only
malta_map <- ggplot(malta) +
  geom_sf(aes(fill = values)) + 
  scale_fill_gradientn( colours = col[2:3], breaks = c(3.45, 3.75, 4.05, 4.35, 4.65)) +
  theme_bw() +
  theme(axis.text.x = element_blank(),
        axis.text.y = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        panel.grid = element_blank(), 
        axis.ticks = element_blank(), 
        legend.position = "none") 


# create a map of Europe
europe_map <- ggplot(merged_data) +
  geom_sf(aes(fill = values)) +
  scale_fill_gradientn( colours = col[2:3], breaks = c(3.45, 3.75, 4.05, 4.35, 4.65),
                      labels = c("≥ 4.6", "4.1 -< 4.6","4 -< 4.1", "3.5 -< 4", "< 3.5"),
                      na.value = "lightgrey") +
  coord_sf(crs = 3035, xlim = c(2377294, 7453440), ylim = c(1313597, 5628510)) +
  labs(title = "Cultural employment, ",date1,"", subtitle = "(% of total employment)") +
  guides(fill = guide_legend(keywidth = unit(0.6, "cm"), keyheight = unit(0.3, "cm"), 
                             title="EU = 3.8")) +
  annotate("text", x = 2380000, y = 1350000, 
         label = "Definition differs for Spain and France (see LFS metadata).",
         size = 2, color = "black", hjust = 0) +
  annotate("text", x = 2380000, y = 1300000, 
           label = "Eurostat (online data code: cult_emp_sex)",
           size = 2, color = "black", hjust = 0) +
  annotate("text", x = 5900000, y = 1350000, 
           label = "Administrative boundaries: © EuroGeographics © UN–FAO © Turkstat", 
           size = 2, color = "black", hjust = 0) +
  annotate("text", x = 5900000, y = 1300000, 
           label = "Cartography: Eurostat – IMAGE, 07/2023", 
           size = 2, color = "black", hjust = 0) +
theme(legend.position = c(0.7, 0.55), 
      legend.text = element_text(size = 5),
      legend.title = element_text(size = 7),
      legend.key.size = unit(0.3, "cm"),
      legend.background = element_blank(),
      axis.text.x = element_blank(),
      axis.text.y = element_blank(),
      axis.title.x = element_blank(),
      axis.title.y = element_blank(),
      panel.grid = element_blank(), 
      axis.ticks = element_blank(), 
      panel.background = element_blank()
      ) 
## Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
## 3.5.0.
## ℹ Please use the `legend.position.inside` argument of `theme()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# combine the map of Europe and of Malta
map1 <- ggdraw(europe_map) +
  draw_plot(malta_map, .6, .7, .7, .1, 
            width = 0.3, height = 0.3) 
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '≥ 4.6' in 'mbcsToSbcs': dot substituted for <e2>
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '≥ 4.6' in 'mbcsToSbcs': dot substituted for <89>
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '≥ 4.6' in 'mbcsToSbcs': dot substituted for <a5>
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '≥ 4.6' in 'mbcsToSbcs': dot substituted for <e2>
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '≥ 4.6' in 'mbcsToSbcs': dot substituted for <89>
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '≥ 4.6' in 'mbcsToSbcs': dot substituted for <a5>
# export png 
ggsave("png/map1.png", plot = map1, width = 8, height = 6, 
       dpi = 1000, create.dir = T)
knitr::include_graphics("png/map1.png")

# export data in excel
write_xlsx(merged_data, path = "pdf/data_cult_emp_map1.xlsx")


# to do:
# 1. find better map data that includes all countries + neighbouring countries
# © EuroGeographics © UN–FAO © Turkstat
# 2. fix coloring according to the eurostat color scheme
# 3. adjust layout according to the layout rules

2. Figure 1

# specify information outside of plot
id1 <- "cult_emp_sex"
date2 <- 2019:2022
col_fig1 <- clrs[1, 4:6]   # doenst work yet

title_fig1 <-   paste0("Cultural employment ‒ annual rates of change, ",
                       date2[2],"-",date2[length(date2)]) 
subtitle_fig1 <-  "(%)"
caption_fig1 <-  paste0("Break in time series in 2021 for all countries for which data are available due to the implementation of the new Regulation (EU)2019/1700, also called the Integrated European Social Statistics Framework Regulation", 
                        "\n(IESS FR) (see LFS metadata)",
                        "\n(\U00B9) 2020: Break in time series.", 
                        "\n(\U00B2) 2021, 2022: Definition differs (see LFS metadata).", 
                        "\n",
                        "\nSource: Eurostat (online data code: ",id1,")")



# read in data
cult_empl_fig1 <- get_eurostat_data(id1, filters = list(time = date2, sex = "T", unit = "THS_PER", label = T))



# calculate the rate of change: (value_currentyear−value_previousyear)/value_previousyear*100
cult_empl_fig1 <-  cult_empl_fig1 %>%
                            group_by(geo) %>%
                            mutate(rate_of_change = (values - lag(values)) / lag(values) * 100) %>%
                            filter(!is.na(rate_of_change))


# add NA lines for North Macedonia, Turkey and Montenegro
NA_fig1 <- data.frame(
  geo = c("MK", "MK", "TR", "TR", "ME", "ME"),
  unit = "THS_PERS",
  sex = "T", 
  time = c(2021, 2022, 2021, 2022, 2021, 2022),
  values = NA, 
  rate_of_change = NA)

NA_fig1[, 1:4] <- lapply(NA_fig1[, 1:4], factor)

dt_sep<-data.table::data.table(geo=c(" ","  ","   "),unit = "THS_PERS", sex="T",time = c(2020, 2021, 2022),values=rep(NA,3),rate_of_change=rep(NA,3))

dt_sep$time<-as.factor(dt_sep$time)

cult_empl_fig1 <- bind_rows(NA_fig1, cult_empl_fig1,dt_sep)

# Specify the desired order of countries
desired_order <- c("EU27_2020"," ","CY", "LU", "IE", "SE", "NL", "PL", "LT", 
                   "DE", "IT", "RO", "PT", "MT", "FR", "AT", "FI", "DK", 
                   "SK", "ES", "EL",
                   "BE", "SI", "HU", "LV", "EE", "HR", 
                   "CZ", "BG","  ", "NO", "IS", "CH", "   ", "RS", "MK", "TR", "ME")


# Reorder the 'geo' variable with 'desired order'
cult_empl_fig1$geo <- factor(cult_empl_fig1$geo, levels = desired_order)


# add country labels 
geo_labels <- c("EU27_2020" = "EU"," "=" " , "CY" = "Cyprus", "LU" = "Luxemburg", 
                "IE" = "Ireland", "SE" = "Sweden", "NL" = "Netherlands", 
                "PL" = "Poland", "LT" = "Lithuania", 
                "DE" = "Germany", "IT" = "Italy", "RO" = "Romania", 
                "PT" = "Portugal", "MT" = "Malta", "FR" = "France", 
                "AT" = "Austria", "FI" = "Finland", "DK" = "Denmark", 
                "SK" = "Slovakia", "ES" = "Spain", "EL" = "Greece",
                "BE" = "Belgium", "SI" = "Slovenia", "HU" = "Hungary",
                "LV" = "Latvia", "EE" = "Estonia", "HR" = "Croatia", 
                "CZ" = "Czechia", "BG" = "Bulgaria", "  "="  ","NO" = "Norway", 
                "IS" = "Iceland", "CH" = "Switzerland", "   "="   ", "RS" = "Serbia", 
                "MK" = "North Macedonia", "TR" = "Turkiye", "ME" = "Montenegro")


# Rename specific countries
geo_labels["DE"] <- "Germany (\U00B9)"
geo_labels["FR"] <- "France (\U00B2)"
geo_labels["ES"] <- "Spain (\U00B2)"
geo_labels["IS"] <- "Iceland (\U00B9)"



# Barplot
fig1 <- ggplot(cult_empl_fig1 , aes(x = geo, y = rate_of_change, fill = time)) + 
  geom_bar(stat = "identity", position = position_dodge()) +
  geom_hline(yintercept = seq(-22, 18, by = 5), color = "gray", linetype = "dashed",linewidth = 0.1)+
  labs(title = title_fig1, 
       subtitle = subtitle_fig1,
       caption = caption_fig1,
        fill = "Year") +
  scale_y_continuous(breaks = seq(-22, 18, by = 5)) +
  scale_x_discrete(labels = geo_labels) + 
 scale_fill_manual(values = c("2020" = "#B09120", "2021" = "#2644A7", "2022" = "#E04040"), 
 # scale_fill_manual(values = c(date[2] = col_fig1[1], date[3] = col_fig1[2], date[4] = col_fig1[3]), # doesnt work like this
                    labels = c("2020" = "2019-2020", "2021" = "2020-2021", "2022" = "2021-2022")) +   # take the years out somehow? 
  theme_bw() +
  theme(legend.background = element_blank(),
        legend.title = element_blank(),
        legend.text = element_text(face = "bold", size = 12),
        legend.position = "bottom",
        legend.key.size = unit(0.3, "cm"),
       # legend.spacing = unit(0.8, "cm"),
        plot.title = element_text(face = "bold", size = 18),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
       # axis.ticks.x.bottom = element_line(),
      #  axis.line.x = element_line(),
        panel.grid = element_blank(), 
        axis.ticks = element_blank(), 
        panel.background = element_blank(),
        #plot.margin = margin(40, 150, 80, 40),
        plot.caption=element_text(hjust = 0), 
      panel.border = element_blank()
      ) 

# export png 
ggsave("png/fig1.png", plot = fig1, width = 8, height = 6, 
       dpi = 1000, create.dir = T)
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_bar()`).
knitr::include_graphics("png/fig1.png")

# export data in excel
write_xlsx(cult_empl_fig1, path = "pdf/data_cult_empl_fig.xlsx")

# to do: 
# spacing between countries
# layout details --> fonts, sizes, etc.
# right coloring scheme 

3. Table 1

# read in data
id1 <- "cult_emp_sex"
date3 <- 2019:2022

cult_emp_tab1 <- get_eurostat_data(id1, filters = list(time = date3, sex = "T", label = T))

cult_emp_tab1 <- cult_emp_tab1 %>% filter(geo != "UK")


cntr_order <- as.factor(c("EU27_2020","BE", "BG","CZ","DK", "DE", "EE","IE","EL","ES","FR",
                          "HR", "IT","CY","LV","LT","LU","HU","MT","NL","AT","PL","PT","RO",
                          "SI","SK","FI","SE","IS","NO","CH","ME", "MK", "RS","TR"))

table(cult_emp_tab1$geo)
## 
##        AT        BE        BG        CH        CY        CZ        DE        DK 
##         8         8         8         8         8         8         8         8 
##        EE        EL        ES EU27_2020        FI        FR        HR        HU 
##         8         8         8         8         8         8         8         8 
##        IE        IS        IT        LT        LU        LV        ME        MK 
##         8         8         8         8         8         8         4         4 
##        MT        NL        NO        PL        PT        RO        RS        SE 
##         8         8         8         8         8         8         8         8 
##        SI        SK        TR        UK 
##         8         8         4         0
# create df with all countries to find NAs
cntr_compl <- data.frame(geo = cntr_order)

# create subsets for restructuring & Perform a left join to include all subgroup countries
cult_emp_tab1_time1_pc <- cult_emp_tab1[cult_emp_tab1$time == date3[1] & cult_emp_tab1$unit == "PC_EMP"] %>%                           select(-c(time, sex, unit))  

cult_emp_tab1_time2_pc <- cult_emp_tab1[cult_emp_tab1$time == date3[2] & cult_emp_tab1$unit == "PC_EMP"]  %>% select(-c(time, sex, unit)) 
cult_emp_tab1_time2_pc <- cntr_compl %>%
  left_join(cult_emp_tab1_time2_pc, by = "geo")

cult_emp_tab1_time3_pc <- cult_emp_tab1[cult_emp_tab1$time == date3[3] & cult_emp_tab1$unit == "PC_EMP"] %>% select(-c(time, sex, unit))  
cult_emp_tab1_time3_pc <- cntr_compl %>%
  left_join(cult_emp_tab1_time3_pc, by = "geo")


cult_emp_tab1_time4_pc <- cult_emp_tab1[cult_emp_tab1$time == date3[4] & cult_emp_tab1$unit == "PC_EMP"] %>% select(-c(time, sex, unit)) 
cult_emp_tab1_time4_pc <- cntr_compl %>%
  left_join(cult_emp_tab1_time4_pc, by = "geo")


cult_emp_tab1_time1_ths <- cult_emp_tab1[cult_emp_tab1$time == date3[1] & cult_emp_tab1$unit == "THS_PER"] %>% select(-c(time, sex, unit)) 

cult_emp_tab1_time2_ths <- cult_emp_tab1[cult_emp_tab1$time == date3[2] & cult_emp_tab1$unit == "THS_PER"] %>% select(-c(time, sex, unit)) 
cult_emp_tab1_time2_ths <- cntr_compl %>%
  left_join(cult_emp_tab1_time2_ths, by = "geo")

cult_emp_tab1_time3_ths <- cult_emp_tab1[cult_emp_tab1$time == date3[3] & cult_emp_tab1$unit == "THS_PER"] %>% select(-c(time, sex, unit))  
cult_emp_tab1_time3_ths <- cntr_compl %>%
  left_join(cult_emp_tab1_time3_ths, by = "geo")

cult_emp_tab1_time4_ths <- cult_emp_tab1[cult_emp_tab1$time == date3[4] & cult_emp_tab1$unit == "THS_PER"] %>% select(-c(time, sex, unit)) 
cult_emp_tab1_time4_ths <- cntr_compl %>%
  left_join(cult_emp_tab1_time4_ths, by = "geo")


# reorder all of them
cult_emp_tab1_time1_pc <- cult_emp_tab1_time1_pc[order(factor(cult_emp_tab1_time1_pc$geo, levels = cntr_order)), ] %>% select(-geo)
cult_emp_tab1_time2_pc <- cult_emp_tab1_time2_pc[order(factor(cult_emp_tab1_time2_pc$geo, levels = cntr_order)), ] %>% select(-geo)
cult_emp_tab1_time3_pc <- cult_emp_tab1_time3_pc[order(factor(cult_emp_tab1_time3_pc$geo, levels = cntr_order)), ] %>% select(-geo)
cult_emp_tab1_time4_pc <- cult_emp_tab1_time4_pc[order(factor(cult_emp_tab1_time4_pc$geo, levels = cntr_order)), ] %>% select(-geo)

cult_emp_tab1_time1_ths <- cult_emp_tab1_time1_ths[order(factor(cult_emp_tab1_time1_ths$geo, levels = cntr_order)), ] %>% select(-geo)
cult_emp_tab1_time2_ths <- cult_emp_tab1_time2_ths[order(factor(cult_emp_tab1_time2_ths$geo, levels = cntr_order)), ] %>% select(-geo)
cult_emp_tab1_time3_ths <- cult_emp_tab1_time3_ths[order(factor(cult_emp_tab1_time3_ths$geo, levels = cntr_order)), ] %>% select(-geo)
cult_emp_tab1_time4_ths <- cult_emp_tab1_time4_ths[order(factor(cult_emp_tab1_time4_ths$geo, levels = cntr_order)), ] %>% select(-geo)

cntr <- as.factor(c("EU", "Belgium", "Bulgaria", "Czechia", "Denmark", "Germany", "Estonia", "Ireland",
                    "Greece", "Spain", "France", "Croatia", "Italy", "Cyprus", "Latvia", "Lithuania",
                    "Luxembourg", "Hungary", "Malta", "Netherlands", "Austria", "Poland","Portugal", "Romania",
                    "Slovenia", "Slovakia", "Finland", "Sweden", "Iceland",
                    "Norway", "Switzerland", "Montenegro", "North Macedonia", "Serbia", "Turkey"))


tab1 <- cbind(cntr, cult_emp_tab1_time1_ths, cult_emp_tab1_time2_ths, cult_emp_tab1_time3_ths, cult_emp_tab1_time4_ths,
              cult_emp_tab1_time1_pc, cult_emp_tab1_time2_pc, cult_emp_tab1_time3_pc, cult_emp_tab1_time4_pc)

colnames(tab1) <- c(" ", "ths1", "ths2","ths3","ths4","pc1", "pc2","pc3","pc4")

tab1 <- tab1 %>%
  mutate(across(2:9, as.numeric))


# create 2 new colums
tab1 <- tab1 %>%
  mutate(change_percent = ifelse(is.na(ths3) | is.na(ths4), NA, ((ths4 - ths3) / ths3) * 100),
         change_absolute = ifelse(is.na(ths3) | is.na(ths4), NA, pc4 - pc3))


tab1[,10] <- round(tab1[, 10], 1)
tab1[, 11] <- round(tab1[, 11], 1)

tab1[is.na(tab1)] <- ":"


# change colum names again
colnames(tab1) <- c("", "2019", "2020", "2021", "2022", "2019", "2020", "2021", "2022", "%", "pp")

tab1 %>%
  kable("html") %>%
  kable_styling(c("bordered")) %>%
  add_header_above(c(" " = 1, "(thousands)" = 4, "(%)" = 4, "Change in the number of employees 2021-2022" = 1, "Change in the share of total employment 2021-2022" = 1), background = '#FAEBD7') %>%
  add_header_above(c(" " = 1, "Number" = 4, "Share of total employment" = 4, " " = 2), background = '#FAEBD7') %>%
  row_spec(0, background = '#FAEBD7') %>%  
  row_spec(1, background = '#FFDEAD', bold = TRUE) %>%
  column_spec(1, bold = TRUE) %>%
  group_rows(" ", 29, 30) %>%
  group_rows(" ", 32, 33) %>% 
  footnote(general = paste("Note: a break in time series for all countries for which 2021 data are available due to the implementation of the new Regulation (EU)2019/1700, also called the integrated European Social",
                           "(1) 2019: break in time series.", "(1) 2020: break in time series.", "(2) 2021, 2022: definition differs (see LFS metadata)", "\n", "Source: Eurostat (online data code: ", id1, ")")) 
Number
Share of total employment
(thousands)
(%)
Change in the number of employees 2021-2022
Change in the share of total employment 2021-2022
2019 2020 2021 2022 2019 2020 2021 2022 % pp
EU 7358.5 7135.8 7401.3 7732.6 3.7 3.6 3.7 3.8 4.5 0.1
Belgium 204.6 214.0 210.3 210 4.2 4.5 4.3 4.2 -0.1 -0.1
Bulgaria 88.5 92.1 91.3 84.3 2.7 2.9 3 2.7 -7.7 -0.3
Czechia 192.7 203.0 209.7 194.3 3.6 3.9 4 3.8 -7.3 -0.2
Denmark 125.5 125.5 132.3 135 4.4 4.4 4.6 4.5 2 -0.1
Germany 1681.5 1572.0 1614.5 1714.5 4.0 3.8 3.9 4 6.2 0.1
Estonia 33.5 33.3 32.2 30.5 5.1 5.1 4.9 4.5 -5.3 -0.4
Ireland 76.0 73.7 73.1 83.3 3.3 3.2 3.1 3.3 14 0.2
Greece 129.4 123.4 144.4 144.7 3.3 3.2 3.7 3.5 0.2 -0.2
Spain 714.3 665.7 728.9 733 3.6 3.5 3.7 3.6 0.6 -0.1
France 960.2 972.3 1087.5 1142.1 3.5 3.6 3.9 4 5 0.1
Croatia 60.4 64.2 55.9 52.4 3.6 3.9 3.3 3.1 -6.3 -0.2
Italy 834.5 791.1 771.3 814.9 3.6 3.5 3.4 3.5 5.7 0.1
Cyprus 14.0 13.2 14.9 18.1 3.4 3.2 3.5 4 21.5 0.5
Latvia 32.0 33.4 36.3 35.4 3.5 3.7 4.2 4 -2.5 -0.2
Lithuania 53.7 53.9 54.2 57.6 3.9 4.0 4 4.1 6.3 0.1
Luxembourg 14.6 12.4 13.1 15 5.1 4.2 4.3 4.8 14.5 0.5
Hungary 164.0 174.7 166.4 163.5 3.6 3.9 3.6 3.5 -1.7 -0.1
Malta 13.3 12.7 11.9 12.5 5.2 4.9 4.4 4.4 5 0
Netherlands 429.9 420.5 471.4 520.8 4.8 4.7 5.1 5.4 10.5 0.3
Austria 181.3 162.5 169.8 175.7 4.2 3.8 3.9 4 3.5 0.1
Poland 548.1 538.1 541.6 576.9 3.3 3.3 3.3 3.4 6.5 0.1
Portugal 167.4 173.4 187.7 197.8 3.4 3.6 3.9 4 5.4 0.1
Romania 135.7 120.4 111.2 117.3 1.6 1.4 1.4 1.5 5.5 0.1
Slovenia 48.6 50.5 45.5 44.9 4.9 5.2 4.7 4.6 -1.3 -0.1
Slovakia 73.8 71.4 74.8 75.9 2.9 2.8 2.9 2.9 1.5 0
Finland 131.5 127.9 119.2 122.6 5.1 5.1 4.6 4.6 2.9 0
Sweden 249.3 240.8 231.8 259.5 4.9 4.8 4.5 4.9 11.9 0.4
Iceland 11.1 11.2 11.4 11.8 5.5 5.8 5.8 5.6 3.5 -0.2
Norway 104.9 106.4 112.5 123.4 3.9 3.9 4 4.3 9.7 0.3
Switzerland 252.6 241.4 228.6 217.5 5.4 5.1 4.9 4.6 -4.9 -0.3
Montenegro 8.1 7.8 : : 3.3 3.5 : : : :
North Macedonia 25.4 26.2 : : 3.2 3.3 : : : :
Serbia 90.9 95.7 91.8 102.2 3.1 3.3 3.2 3.5 11.3 0.3
Turkey 669.0 645.2 : : 2.4 2.4 : : : :
Note:
Note: a break in time series for all countries for which 2021 data are available due to the implementation of the new Regulation (EU)2019/1700, also called the integrated European Social (1) 2019: break in time series. (1) 2020: break in time series. (2) 2021, 2022: definition differs (see LFS metadata)
Source: Eurostat (online data code: cult_emp_sex )
# export png 



# export data in excel
write_xlsx(tab1, path = "pdf/tab1.xlsx")



# Problems
# design 
# export

4. Figure 2

# read in data
id2 <- "cult_emp_n2"
date4 <- 2012:2022
# col_fig2 <-             # i need 7 colours but there are only 6 per line? 


cult_emp_n2 <- get_eurostat_data(id2, filters = list(time = date4,nace_r2 = c("C18", "J58", "J59", "J60", "M74", "R90", "R91"), geo = "EU27_2020", label = T))


# create labels and order for nace_r2 as a factor
cult_emp_n2$nace_r2 <- factor(cult_emp_n2$nace_r2, levels = c('R90', 'M74', 'C18', 'J58', 'R91', 'J59', 'J60'), 
                                                              labels = c(
                                                                  "Creative, arts and entertainment activities (NACE, R90)",
                                                                 "Other professional, scientific and technical activities \n(NACE, M74.1, M74.2, M74.3)",
                                                                  "Printing and reproduction of record media (NACE,\nC18)",
                                                                  "Selected publishing activities (NACE, part of J58)",
                                                                 "Libraries, archives, museums and other cultural\nactivities (NACE, R91)",
                                                                 "Motion picture, video and television programme\nproduction, sound recording and music publishing\nactivities (NACE, J59)",
                                                                 "Programming and broadcasting activities (NACE, J60)"))


# Plot
fig2 <- cult_emp_n2 %>%
  ggplot( aes(x=time, y= values, group= nace_r2, color=nace_r2)) +
  geom_line(size = 2) +
  # scale_color_viridis(discrete = TRUE) +
#  ggtitle("Evolution of cultural employment by selected NACE Rev. 2 activities, EU, 2012-2022", "(thousands)") + 
  labs(title = "Evolution of cultural employment by selected NACE Rev. 2 activities, EU, 2012-2022", 
       subtitle = "(thousands)", 
       caption = paste0("Note: a break in time series for all countries for which 2021 data are available due to the implementation of the new Regulation (EU)2019/1700, also called the integrated European Social Statistics Framework Regulation (IESS FR)", "\n(see LFS metadata)","\n", "\nSource: Eurostat(online data code: ",id2,"")) +
  scale_y_continuous(breaks = seq(0, 1200, by = 200), expand = c(0, 0)) +
  scale_x_discrete(expand = c(0, 0)) +
  geom_hline(yintercept = seq(0, 1200, by = 200), color = "black", linetype = "dashed") +
  theme(legend.background = element_blank(),
        legend.title = element_blank(),
        legend.text = element_text(face = "bold", size = 8),
        legend.key.height = unit(3.5, "lines"),
        #legend.position.inside = c(0.9, 0.9),
        plot.title = element_text(face = "bold", size = 12),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.ticks.x.bottom = element_line(),
        axis.line.x = element_line(),
        panel.grid = element_line(), 
        axis.ticks = element_blank(), 
        panel.background = element_blank(),
       # plot.margin = margin(40, 150, 80, 40),
        plot.caption=element_text(hjust = 0)) 
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# export png 
ggsave("png/fig2.png", plot = fig2, width = 8, height = 6, 
       dpi = 1000, create.dir = T)
knitr::include_graphics("png/fig2.png")

# export data in excel
write_xlsx(cult_emp_n2, path = "pdf/data_cult_empl_fig2.xlsx")


# to do: 
# adjust layout / colours / fonts

5. Figure 3

#lfsa_egan2 <- get_eurostat_data("lfsa_egan2")
#cult_emp_age <- get_eurostat_data("cult_emp_age")
#lfsa_egan<- get_eurostat_data("lfsa_egan")
#cult_emp_edu <- get_eurostat_data("cult_emp_edu")
#lfsa_egaed <- get_eurostat_data("lfsa_egaed")



# Plot sex

#cult_emp_sex_fig3 <- cult_empl_sex %>% 
#                     filter(time == 2022, geo %in% "EU27_2020", unit %in% "PC_EMP", 
#                            sex != "T")
#

#lfsa_egaed_22 <- lfsa_egaed %>% filter(time == 2022, geo %in% "EU27_2020")
                  
#tot_empl_fig3 <- 


#fig3 <- ggplot(cult_emp_sex_fig3, aes(x = "", y = values, fill = sex)) +
#  geom_bar(stat = "identity", width = 0.5)

## momentary challenge: which dataset do I use to get the total employment values? 

6. Figure 4

#id1 <- cult_emp_sex
date6 <- 2012:2022

# filter for needed variables 
cult_emp_fig4 <- get_eurostat_data(id1, filters = list(time = date6, geo = "EU27_2020", unit = "THS_PER", sex = c("M", "F"))) 
 

# create factor for sex with the right order and labels
cult_emp_fig4$sex <- factor(cult_emp_fig4$sex, levels = c("M", "F"), 
       labels = c("Male", "Female"))


# the line plot
fig4 <- cult_emp_fig4 %>%
  ggplot( aes(x=time, y= values, group= sex, color= sex)) +
  geom_line(size = 2) +
  geom_point(size = 3) +
  labs(title = "Evolution of cultural employment in the EU by sex, 2012-2022", 
       subtitle = "(thousands)", 
       caption = paste0("Note: a break in time series for all countries for which 2021 data are available due to the implementation of the new Regulation (EU)2019/1700, also called the integrated \nEuropean Social Statistics Framework Regulation (IESS FR) (see LFS metadata)", "\n",
"\nSource: Eurostat(online data code: ",id1,")")) +
  scale_y_continuous(breaks = seq(2500, 4500, by = 200), limits = c(2500, 4500), expand = c(0, 0)) +
  scale_x_discrete(expand = c(0, 0)) +
  geom_hline(yintercept = seq(2500, 4500, by = 200), color = "black", linetype = "dashed") +
  theme(legend.text = element_text(face = "bold", size = 8),
        #legend.key.height = unit(3.5, "lines"),
        plot.title = element_text(face = "bold", size = 12),
        legend.background = element_blank(),
      #  legend.position.inside = c(0.8, 0.9),
        legend.title = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.ticks.x.bottom = element_line(),
        axis.line.x = element_line(),
        panel.grid = element_line(), 
        axis.ticks = element_blank(), 
        panel.background = element_blank(),
       # plot.margin = margin(40, 150, 80, 40),
        plot.caption=element_text(hjust = 0)) # +
       # coord_cartesian(xlim = c(2012, 2022), clip = 'off')


# export png 
ggsave("png/fig4.png", plot = fig4, width = 10, height = 6, 
       dpi = 1000, create.dir = T)
knitr::include_graphics("png/fig4.png")

# export data in excel
write_xlsx(cult_emp_fig4, path = "pdf/data_cult_emp_fig4.xlsx")


# Problems left: 
# how can I have the full dots at the beginning and the end without having a longer x-axis? 
# how to move the legend farther up? 
# layout/fonts/ colours are not right yet

7. Table 2

id8 <- "cult_emp_wsta"
date7 <- 2022


cult_emp_tab2 <- get_eurostat_data(id8, filters = list(time = date7, sex = "T", label = T))

cult_emp_tab2 <- cult_emp_tab2 %>% select(-c(sex, unit, time))


cntr_order_tab2 <- as.factor(c("EU27_2020","BE", "BG","CZ","DK", "DE", "EE","IE","EL","ES","FR",
                          "HR", "IT","CY","LV","LT","LU","HU","MT","NL","AT","PL","PT","RO",
                          "SI","SK","FI","SE","IS","NO","CH","RS"))


# create subsets
tab2_self_clt <- cult_emp_tab2[cult_emp_tab2$ind_type == "SELF" & cult_emp_tab2$indic_em == "EMP_CLT"]
tab2_self_lfs <- cult_emp_tab2[cult_emp_tab2$ind_type == "SELF" & cult_emp_tab2$indic_em == "EMP_LFS"]

tab2_ft_clt <- cult_emp_tab2[cult_emp_tab2$ind_type == "EMP_FT" & cult_emp_tab2$indic_em == "EMP_CLT"]
tab2_ft_lfs <- cult_emp_tab2[cult_emp_tab2$ind_type == "EMP_FT" & cult_emp_tab2$indic_em == "EMP_LFS"]

tab2_perm_clt <- cult_emp_tab2[cult_emp_tab2$ind_type == "SAL_PERM" & cult_emp_tab2$indic_em == "EMP_CLT"]
tab2_perm_lfs <- cult_emp_tab2[cult_emp_tab2$ind_type == "SAL_PERM" & cult_emp_tab2$indic_em == "EMP_LFS"]

tab2_1_clt <- cult_emp_tab2[cult_emp_tab2$ind_type == "EMP_JOB1" & cult_emp_tab2$indic_em == "EMP_CLT"]
tab2_1_lfs <- cult_emp_tab2[cult_emp_tab2$ind_type == "EMP_JOB1" & cult_emp_tab2$indic_em == "EMP_LFS"]

# reorder 
tab2_self_clt <- tab2_self_clt[order(factor(tab2_self_clt$geo, levels = cntr_order_tab2))] %>% select(-c(geo, ind_type, indic_em))
tab2_self_lfs <- tab2_self_lfs[order(factor(tab2_self_lfs$geo, levels = cntr_order_tab2))] %>% select(-c(geo, ind_type, indic_em))

tab2_ft_clt <- tab2_ft_clt[order(factor(tab2_ft_clt$geo, levels = cntr_order_tab2))] %>% select(-c(geo, ind_type, indic_em))
tab2_ft_lfs <- tab2_ft_lfs[order(factor(tab2_ft_lfs$geo, levels = cntr_order_tab2))] %>% select(-c(geo, ind_type, indic_em))

tab2_perm_clt <- tab2_perm_clt[order(factor(tab2_perm_clt$geo, levels = cntr_order_tab2))] %>% select(-c(geo, ind_type, indic_em))
tab2_perm_lfs <- tab2_perm_lfs[order(factor(tab2_perm_lfs$geo, levels = cntr_order_tab2))] %>% select(-c(geo, ind_type, indic_em))

tab2_1_clt <- tab2_1_clt[order(factor(tab2_1_clt$geo, levels = cntr_order_tab2))] %>% select(-c(geo, ind_type, indic_em))
tab2_1_lfs <- tab2_1_lfs[order(factor(tab2_1_lfs$geo, levels = cntr_order_tab2))] %>% select(-c(geo, ind_type, indic_em))




tab2_cntr <- as.factor(c("EU", "Belgium", "Bulgaria", "Czechia", "Denmark", "Germany", "Estonia", "Ireland",
                    "Greece", "Spain", "France", "Croatia", "Italy", "Cyprus", "Latvia", "Lithuania",
                    "Luxembourg", "Hungary", "Malta", "Netherlands", "Austria", "Poland","Portugal", "Romania",
                    "Slovenia", "Slovakia", "Finland", "Sweden", "Iceland",
                    "Norway", "Switzerland", "Serbia"))



tab2 <- cbind(cntr, tab2_self_clt, tab2_self_lfs, tab2_ft_clt, tab2_ft_lfs, tab2_perm_clt, tab2_perm_lfs, tab2_1_clt, tab2_1_lfs)
## Warning: Item 2 has 32 rows but longest item has 35; recycled with remainder.
## Warning: Item 3 has 32 rows but longest item has 35; recycled with remainder.
## Warning: Item 4 has 32 rows but longest item has 35; recycled with remainder.
## Warning: Item 5 has 32 rows but longest item has 35; recycled with remainder.
## Warning: Item 6 has 32 rows but longest item has 35; recycled with remainder.
## Warning: Item 7 has 32 rows but longest item has 35; recycled with remainder.
## Warning: Item 8 has 32 rows but longest item has 35; recycled with remainder.
## Warning: Item 9 has 32 rows but longest item has 35; recycled with remainder.
colnames(tab2) <- c(" ", "Cultural employment", "Total employment", "Cultural employment", "Total employment", "Cultural employment", "Total employment", "Cultural employment", "Total employment")

tab2 %>%
  kable("html") %>%
  kable_styling(c("bordered")) %>%
  add_header_above(c(" " = 1, "Self-employed persons" = 2, "Employed persons working full-time" = 2, "Employees with a permanent job" = 2, "Employed persons with one job only" = 2), background = '#FAEBD7') %>%
  row_spec(0, background = '#FAEBD7') %>%  
  row_spec(1, background = '#FFDEAD', bold = TRUE) %>%
  column_spec(1, bold = TRUE) %>%
  group_rows(" ", 29, 30) %>%
  group_rows(" ", 32, 33) %>% 
  footnote(general = paste("(1) Definition differs (see LFS metadata)", "\n", "Source: Eurostat (online data code: ", id8, ")")) 
Self-employed persons
Employed persons working full-time
Employees with a permanent job
Employed persons with one job only
Cultural employment Total employment Cultural employment Total employment Cultural employment Total employment Cultural employment Total employment
EU 31.7 13.8 76.5 81.5 81.5 85.9 93.1 96.1
Belgium 32.2 14.4 74.9 75.6 86.8 90.2 91.3 94.8
Bulgaria 17.2 10.6 95.8 98.2 97.3 96.0 98.9 99.6
Czechia 37.5 15.5 85.7 92.8 91.1 93.3 92.9 97.1
Denmark 20.6 8.3 67.6 74.6 85.0 89.0 86.8 92.1
Germany 28.4 8.4 66.7 70.5 83.4 87.6 92.9 95.4
Estonia 21.6 10.4 72.5 84.7 98.3 96.9 88.7 93.1
Ireland 32.7 13.1 76.3 78.7 90.3 91.5 94.0 96.5
Greece 32.6 27.3 76.7 91.8 83.3 89.9 94.1 98.4
Spain 31.7 15.0 82.4 86.5 76.0 78.9 94.7 97.4
France 29.7 12.8 78.0 82.7 72.6 83.8 92.3 95.6
Croatia 23.1 12.3 89.7 94.4 85.8 85.2 98.7 98.5
Italy 46.2 20.6 80.5 81.8 81.7 83.2 97.1 98.8
Cyprus 26.4 10.2 74.9 89.6 90.0 88.6 94.2 97.6
Latvia 24.8 12.7 84.3 92.3 96.8 97.3 95.2 95.9
Lithuania 23.3 11.7 84.7 93.1 98.8 98.1 88.5 94.3
Luxembourg 21.1 9.1 74.7 81.2 88.4 92.6 94.7 96.2
Hungary 27.1 12.3 92.0 94.7 96.6 94.5 97.9 98.7
Malta 34.4 15.0 81.4 87.7 91.3 91.9 90.7 95.5
Netherlands 47.2 16.0 60.7 56.6 67.3 72.2 85.5 89.8
Austria 31.7 10.9 64.2 69.5 88.8 91.2 90.4 94.7
Poland 23.7 18.8 89.4 93.6 81.8 84.5 95.3 95.8
Portugal 29.7 14.5 87.0 92.2 78.1 83.5 91.3 95.0
Romania 12.5 11.9 96.7 96.4 99.5 97.8 99.7 99.4
Slovenia 31.9 12.0 84.7 90.5 83.2 88.1 94.4 95.5
Slovakia 37.2 14.9 91.3 96.4 92.9 95.5 97.6 98.6
Finland 27.7 12.8 77.0 80.8 81.6 83.5 87.6 92.4
Sweden 27.3 10.0 70.4 77.0 83.8 84.1 92.0 93.9
Iceland 33.0 12.5 73.1 77.1 83.9 86.9 88.9 90.0
Norway 18.2 4.4 67.6 74.3 89.8 91.5 91.1 93.5
Switzerland 25.0 13.8 46.2 60.7 86.7 86.4 84.6 92.5
Montenegro 23.8 16.4 87.5 93.0 74.8 78.1 93.9 93.0
North Macedonia 31.7 13.8 76.5 81.5 81.5 85.9 93.1 96.1
Serbia 32.2 14.4 74.9 75.6 86.8 90.2 91.3 94.8
Turkey 17.2 10.6 95.8 98.2 97.3 96.0 98.9 99.6
Note:
(1) Definition differs (see LFS metadata)
Source: Eurostat (online data code: cult_emp_wsta )
# export png 



# export data in excel
write_xlsx(tab2, path = "pdf/tab2.xlsx")

8. Table 3